library(magrittr)
library(plyr)
library(tidyverse)
library(readxl)
library(vietnamcode)


rm(list=ls())
home = 'C:/Users/Jason/Dropbox/VNA_Responsiveness/Analysis/JOP-dataverse/'


papi = paste0(home, 'papi-education-formatted-2017.xlsx') %>%
  read_xlsx
pci  = paste0(home, 'pci-education-formatted-2017.xlsx') %>%
  read_xlsx
provinces = papi$province
papi = papi %>%
  subset(select=-c(province,
                   header)) %>%
  prcomp(scale.=T)
pci = pci %>%
  subset(select=-c(province,
                   header)) %>%
  prcomp(scale.=T)
combined = data.frame(PAPI=papi$x[,'PC1'], 
                      PCI=pci$x[,'PC1'], 
                      Province=provinces) %>%
  mutate(PAPI=PAPI*(-1), 
         PCI=PCI*(-1))


survey = paste0(home, 'survey-outcomes.xlsx') %>%
  read_xlsx %>%
  subset(!is.na(Treatment))
pooled_texts = paste0(home, 'pooled-outcomes.xlsx') %>%
  read_xlsx %>%
  mutate(Treatment=factor(x=Treatment, 
                          levels=c('Control',
                                   'Citizen',
                                   'Firm'))) %>%
  subset(!is.na(Treatment))
caucus_texts = paste0(home, 'caucus-outcomes.xlsx') %>%
  read_xlsx %>%
  mutate(Treatment=factor(x=Treatment, 
                          levels=c('Control',
                                   'Citizen',
                                   'Firm'))) %>%
  subset(!is.na(Treatment))
query_texts = paste0(home, 'query-outcomes.xlsx') %>%
  read_xlsx %>%
  mutate(Treatment=factor(x=Treatment, 
                          levels=c('Control',
                                   'Citizen',
                                   'Firm'))) %>%
  subset(!is.na(Treatment))
floor_texts = paste0(home, 'floor-outcomes.xlsx') %>%
  read_xlsx %>%
  mutate(Treatment=factor(x=Treatment, 
                          levels=c('Control',
                                   'Citizen',
                                   'Firm'))) %>%
  subset(!is.na(Treatment))


data("vietnamcode_data")
combined = combined %>%
  mutate(Province=mapvalues(x=as.character(Province), 
                            from=vietnamcode_data$province_name_diacritics, 
                            to=vietnamcode_data$province_name,
                            warn_missing=F))
combined$Province[str_detect(string=combined$Province, 
                             pattern='B\u00E0 R\u1ECBa\u2013V\u0169ng T\u00E0u')] = 'Ba Ria Vung Tau'
combined$Province[str_detect(string=combined$Province, 
                             pattern='Th\u1EEBa Thi\u00EAn\u2013Hu\u1EBF')] = 'Thua Thien - Hue'
combined$Province = mapvalues(x=combined$Province, 
                              from=sort(setdiff(combined$Province, unique(survey$Province))), 
                              to=sort(setdiff(unique(survey$Province), combined$Province)))
survey = merge(x=survey, 
               y=combined, 
               by='Province', 
               all.x=T)
pooled_texts = merge(x=pooled_texts, 
                     y=combined, 
                     by='Province', 
                     all.x=T)
floor_texts = merge(x=floor_texts, 
                    y=combined, 
                    by='Province', 
                    all.x=T)
query_texts = merge(x=query_texts, 
                    y=combined, 
                    by='Province', 
                    all.x=T)
caucus_texts = merge(x=caucus_texts, 
                     y=combined, 
                     by='Province', 
                     all.x=T)


fit_1 = lm(Q1 ~ Citizen*PAPI + Firm*PCI + Prop.Citizen + Prop.Firm + FullTime + CentNom + Competitive, 
           data=survey)
fit_2 = lm(Spoke ~ Citizen*PAPI + Firm*PCI + Prop.Citizen + Prop.Firm + FullTime + CentNom + Competitive, 
           data=pooled_texts)
fit_3 = lm(Spoke ~ Citizen*PAPI + Firm*PCI + Prop.Citizen + Prop.Firm + FullTime + CentNom + Competitive, 
           data=floor_texts)
fit_4 = lm(Spoke ~ Citizen*PAPI + Firm*PCI + Prop.Citizen + Prop.Firm + FullTime + CentNom + Competitive, 
           data=query_texts)
fit_5 = lm(Spoke ~ Citizen*PAPI + Firm*PCI + Prop.Citizen + Prop.Firm + FullTime + CentNom + Competitive, 
           data=caucus_texts)
fit_1_cluster = cluster.vcov(model=fit_1, ~ Province)
fit_2_cluster = cluster.vcov(model=fit_2, ~ Province)
fit_3_cluster = cluster.vcov(model=fit_3, ~ Province)
fit_4_cluster = cluster.vcov(model=fit_4, ~ Province)
fit_5_cluster = cluster.vcov(model=fit_5, ~ Province)


stargazer(coeftest(x=fit_1, vcov.=fit_1_cluster),
          coeftest(x=fit_2, vcov.=fit_2_cluster),
          coeftest(x=fit_3, vcov.=fit_3_cluster),
          coeftest(x=fit_4, vcov.=fit_4_cluster),
          coeftest(x=fit_5, vcov.=fit_5_cluster),
          title='PCA-treatment interactions. Multiplicative interactions between 1) PAPI scores and citizen treatment; and 2) PCI scores and firm treatment.', 
          covariate.labels=c('Citizen',
                             'PAPI',
                             'Citizen $times$ PAPI',
                             '% Citizen',
                             'Firm',
                             'PCI',
                             'Firm $times$ PCI',
                             '% Firm',
                             'Full-time',
                             'Central Nominee',
                             'Competitive',
                             'Constant'),
          order=c(1:2, 10, 5, 3:4, 11, 6, 7:9, 12),
          align=T, 
          header=F, 
          no.space=T, 
          star.cutoffs=NA,
          column.labels=c('Delegate prepared','Delegate spoke'),
          column.separate=c(1, 4),
          dep.var.labels.include=F,
          label='appendix-table-06.1',
          notes.append=F, 
          notes='Province-clustered standard errors in parentheses.') %>%
  writeClipboard
stargazer(fit_1, # puts N and R^2 in table
          fit_2, 
          fit_3, 
          fit_4, 
          fit_5) 

